******************************************************************************************** * The following SAS code accompanies the article: * Atkins, D. C., & Gallop, R. J. (2007). Re-thinking how family researchers model infrequent * outcomes: A tutorial on count regression and zero-inflated models. Journal of Family * Psychology. * * ******************************************************************************************** ; options formdlim=' ' ls=90 ps=50 ; data mardas; input id gender msi das sex afc infidelity; cards; 1 1 4 70 61 65 0 1 2 3 92 64 69 0 2 1 7 45 69 69 0 2 2 4 49 68 69 0 3 1 4 79 49 49 0 3 2 0 90 56 57 0 4 1 8 92 66 56 0 4 2 2 70 68 67 0 5 1 2 87 42 65 0 5 2 3 75 56 59 0 6 1 6 67 49 69 0 6 2 5 66 64 72 0 7 1 0 81 73 69 0 7 2 0 91 61 65 0 8 1 2 102 58 54 1 8 2 0 75 61 63 1 9 1 6 71 35 63 1 9 2 2 110 56 67 1 10 1 0 112 42 49 0 10 2 0 91 54 57 0 11 1 4 88 64 62 0 11 2 2 98 61 63 0 12 1 7 95 69 63 0 12 2 2 109 56 61 0 13 1 1 92 69 58 0 13 2 0 102 56 61 0 14 1 3 103 79 52 0 14 2 0 85 64 61 0 15 1 3 92 55 69 0 15 2 2 108 46 54 0 16 1 1 113 73 60 0 16 2 1 91 56 65 0 17 1 5 90 61 69 0 17 2 3 92 56 61 0 18 1 6 88 73 60 1 18 2 4 71 54 72 1 19 1 3 88 73 58 0 20 1 5 95 61 62 0 20 2 6 70 75 72 0 21 1 0 59 66 74 0 21 2 1 71 52 72 0 22 1 6 71 58 69 0 22 2 6 40 75 69 0 23 1 9 68 73 65 1 23 2 11 59 75 76 1 24 1 7 71 73 74 0 24 2 2 87 64 57 0 25 1 2 84 66 65 0 25 2 1 89 64 65 0 26 1 1 76 66 74 0 26 2 3 86 64 65 0 27 1 6 79 73 69 0 27 2 2 82 50 65 0 28 1 6 72 35 62 0 28 2 5 91 40 65 0 29 1 6 89 66 69 0 29 2 4 82 40 69 0 30 1 6 95 73 49 0 30 2 1 113 56 65 0 31 1 4 95 46 62 0 31 2 4 63 48 57 0 32 1 3 75 51 65 0 32 2 0 93 40 54 0 33 1 0 82 69 62 0 33 2 4 96 68 76 0 34 1 4 56 61 69 0 34 2 5 57 68 69 0 35 1 8 76 64 63 0 35 2 0 100 52 54 0 36 1 4 62 66 62 1 36 2 0 88 68 57 1 37 1 1 97 58 54 0 37 2 3 87 75 76 0 38 1 1 83 64 52 0 38 2 2 94 58 63 0 39 1 0 98 55 56 0 39 2 0 89 44 57 0 40 1 0 94 46 54 0 40 2 0 99 50 63 0 41 1 0 81 79 60 0 41 2 1 91 54 52 0 42 1 0 94 55 62 0 42 2 0 83 56 57 0 43 1 5 70 61 74 0 43 2 2 56 61 76 0 44 1 1 101 35 63 0 44 2 8 93 44 65 0 45 1 4 77 55 63 0 45 2 2 83 61 72 0 46 1 2 93 69 54 0 46 2 2 82 75 54 0 47 1 3 85 58 58 1 47 2 7 96 75 76 1 48 1 5 82 51 65 0 48 2 1 78 50 69 0 49 1 5 72 49 63 0 49 2 5 70 54 63 0 50 1 0 94 42 62 0 50 2 0 87 54 67 0 51 1 0 103 66 52 0 51 2 0 81 58 65 0 52 1 2 70 66 62 1 52 2 4 49 68 67 1 53 1 6 81 58 58 0 53 2 4 96 58 69 0 54 1 8 68 69 69 0 54 2 4 79 61 76 0 55 1 9 97 58 74 1 55 2 7 98 56 76 1 56 1 3 94 55 54 0 56 2 3 102 68 57 0 57 1 7 105 61 65 1 57 2 8 86 75 72 1 58 1 6 93 55 46 1 58 2 6 57 61 65 1 59 1 5 69 61 52 0 59 2 7 92 61 67 0 60 1 4 93 66 63 0 60 2 3 80 64 72 0 61 1 0 101 61 56 0 61 2 4 86 68 72 0 62 1 2 98 64 52 0 62 2 2 93 68 65 0 63 1 0 95 42 62 0 63 2 0 89 52 65 0 64 1 9 74 73 62 1 64 2 0 88 56 57 1 65 1 4 70 66 49 1 65 2 4 47 64 67 1 66 1 0 97 66 63 0 66 2 2 87 46 54 0 67 1 7 64 69 69 0 67 2 1 99 56 69 0 68 1 0 52 61 65 1 68 2 0 63 68 76 1 69 1 7 69 66 74 1 69 2 9 77 61 69 1 70 1 6 88 69 65 1 70 2 5 91 61 57 1 71 2 0 74 58 63 0 72 1 4 89 49 63 0 72 2 7 96 58 67 0 73 1 5 76 79 65 0 73 2 6 95 75 72 0 74 1 5 95 64 46 0 74 2 3 95 52 63 0 75 1 0 105 61 60 0 75 2 2 93 48 54 0 76 1 0 102 49 58 0 76 2 1 78 40 52 0 77 1 1 94 64 63 0 77 2 0 88 44 45 0 78 1 6 87 73 58 0 78 2 1 100 58 61 0 79 1 4 81 79 65 0 79 2 4 95 68 59 0 80 1 0 97 46 58 0 80 2 0 95 34 52 0 81 1 4 85 49 54 0 81 2 0 93 61 59 0 82 1 7 49 66 74 0 82 2 1 74 75 72 0 83 1 2 70 69 69 0 83 2 2 89 68 65 0 84 1 0 107 53 62 0 84 2 0 91 64 54 0 85 1 8 81 73 74 0 85 2 5 83 52 61 0 86 1 0 78 73 60 0 86 2 0 107 52 61 0 87 1 0 96 64 62 0 87 2 3 106 61 52 0 88 1 5 74 79 65 0 88 2 0 109 68 52 0 89 1 0 91 55 69 0 89 2 0 84 68 72 0 90 1 10 68 64 60 0 90 2 4 64 54 63 0 91 1 1 92 55 58 0 91 2 2 108 58 57 0 92 1 0 103 58 46 0 92 2 2 88 68 63 0 93 1 0 96 69 62 0 93 2 2 74 61 65 0 94 1 3 90 73 69 0 95 1 1 86 55 69 0 95 2 2 84 58 57 0 96 1 3 79 66 69 0 96 2 0 99 56 63 0 97 1 0 72 58 60 0 97 2 3 72 68 67 0 98 1 3 91 69 69 0 98 2 4 83 68 65 0 99 1 6 57 64 69 0 99 2 5 69 56 69 0 100 1 6 101 61 58 0 100 2 8 96 68 63 0 101 1 0 84 51 60 0 101 2 5 83 46 57 0 102 1 6 85 53 62 0 102 2 7 76 64 76 0 103 1 3 77 61 65 0 103 2 0 88 68 72 0 104 1 0 93 53 62 0 104 2 4 80 54 65 0 105 1 3 84 42 63 0 105 2 5 97 54 69 0 106 1 0 89 61 63 0 106 2 0 91 75 69 0 107 1 4 87 49 74 0 107 2 1 87 64 65 0 108 1 0 103 53 56 0 108 2 6 94 44 45 0 109 1 4 93 35 58 0 109 2 2 94 68 67 0 110 1 5 53 55 65 0 110 2 4 79 68 72 0 111 1 7 92 61 65 0 111 2 7 87 46 59 0 112 1 0 76 42 54 0 112 2 4 92 58 65 0 113 1 1 89 64 63 0 113 2 1 62 64 69 0 114 1 2 74 61 62 0 114 2 5 66 68 72 0 115 1 8 67 79 74 1 115 2 5 75 64 72 1 116 1 3 70 64 74 1 116 2 4 71 75 72 1 117 1 0 90 55 58 0 117 2 1 94 58 67 0 118 1 6 83 55 63 1 118 2 5 54 75 72 1 119 1 8 74 58 74 0 119 2 6 70 68 67 0 120 1 1 63 58 65 0 120 2 1 72 58 65 0 121 1 3 93 61 63 0 121 2 2 102 61 65 0 122 1 8 95 73 60 0 122 2 6 55 64 63 0 123 1 7 103 69 60 0 123 2 2 94 68 69 0 124 1 3 99 61 65 0 124 2 7 85 34 54 0 125 1 0 90 66 56 0 125 2 0 103 68 69 0 126 1 5 75 66 63 0 126 2 3 71 68 63 0 127 1 0 115 64 49 0 127 2 1 83 52 69 0 128 1 4 93 73 65 0 128 2 0 99 52 54 0 129 1 4 93 55 63 0 129 2 0 100 34 61 0 130 1 2 93 73 62 0 130 2 3 86 58 67 0 131 1 5 95 58 69 1 131 2 9 62 58 67 1 132 1 2 103 35 62 0 132 2 6 85 44 61 0 133 1 8 58 61 65 0 133 2 8 58 64 76 0 ; run; proc contents data=mardas position short; quit; proc ttest data=mardas; class gender;* infidelity; var msi; quit; proc ttest data=mardas; class infidelity; var msi; quit; proc freq data=mardas; quit; proc means data=mardas; var das sex afc; output out=o mean=mndas mnsex mnafc; quit; proc sql; create table mardas as select a.*, b.* from mardas a left join o b on a.id and a.gender; quit; proc means data=mardas; quit; data mardas; set mardas; genderl = (gender=2); infidelityTRUE = (infidelity=1); das_c = das - mndas; sex_c = sex - mnsex; afc_c = afc - mnafc; das_c_inf = das_c* infidelityTrue; run; proc means data=mardas; quit; ods pdf file='D:\Smstat\Datkins\zip and zinb in sas.pdf'; proc genmod data=mardas; title 'Poisson Regression'; model msi = das_c infidelityTRUE genderl afc_c sex_c das_c_inf / link=log dist=poisson; quit;title; proc genmod data=mardas; title 'Negative Binomial Model'; model msi = das_c infidelityTRUE genderl afc_c sex_c das_c_inf / link=log dist=negbin; quit;title; data mardas; set mardas; RESPONSE = MSI; run; proc nlmixed data=mardas ; title 'ZIP Model'; /* a0 = intercept of the logistic model of the inflation prob, a1 is that slope, b0-b1 are the regression coefficients for the Poisson mean */ parameters a0=0 a1=0 a2=0 a3=0 a4=0 a5=0 a6=0 b0=0 b1=0 b2=0 b3=0 b4=0 b5=0 b6=0; /* linear predictor for the inflation probability */ linpinfl = a0 +a1*das_c+ a2*infidelityTRUE +a3*genderl+a4*afc_c+a5*sex_c+a6*das_c_inf ; /* infprob = inflation probability for zeros */ /* = logistic transform of the linear predictor */ infprob = 1/(1+exp(-linpinfl)); /* Poisson mean */ lambda = exp(b0 +b1*das_c+ b2*infidelityTRUE +b3*genderl+b4*afc_c+b5*sex_c+b6*das_c_inf ); /* Build the ZIP log likelihood */ if response=0 then ll = log(infprob + (1-infprob)*exp(-lambda)); else ll = log((1-infprob)) + RESPONSE*log(lambda) - lgamma(response+1) - lambda; model RESPONSE ~ general(ll); /* predict statement to get the predicted number of RESPONSES given the Poisson mean and the inflation probability */ predict (1-infprob)*lambda out = PREDICTED_RESPONSE; quit;title; data mardas; set mardas; density = MSI; run; proc nlmixed data=mardas; title 'ZINB Model'; parameters a0=0 a1=0 a2=0 a3=0 a4=0 a5=0 a6=0 b0=0 b1=0 b2=0 b3=0 b4=0 b5=0 b6=0; linpinfl = a0 +a1*das_c+ a2*infidelityTRUE +a3*genderl+a4*afc_c+a5*sex_c+a6*das_c_inf; psi = 1 / (1 + exp(linpinfl)); eta_nb = (b0 +b1*das_c+ b2*infidelityTRUE +b3*genderl+b4*afc_c+b5*sex_c+b6*das_c_inf); lambda = exp(eta_nb); p0 = psi + (1-psi)*exp(-(density+(1/k))*log(1+k*lambda)); p_else = (1-psi)* exp(lgamma(density+(1/k)) - lgamma(density+1) - lgamma(1/k) + density*log(k*lambda) - (density+(1/k))*log(1+k*lambda)); if density=0 then loglike = log(p0); else loglike = log(p_else); model density ~ general(loglike); quit;title; ods pdf close; proc nlmixed data=mardas qpoints=12; title 'ZINB Model with Random Effects'; footnote 'We fit only a Random Intercept in Zero only'; parameters a0=-1.5 a1=0.05 a2=-0.57 a3=-0.03 a4=-0.06 a5=-0.02 a6=-0.08 b0=1.38 b1=-0.01 b2=0.39 b3=-0.20 b4=0.01 b5=0 b6=0.01 v1=0.3; *v2=1.0; ***variance for second random effect if fitted***; u2=0; ****random effect 2 set to 0 since it is not fitted***; linpinfl = a0+u1 +a1*das_c+ a2*infidelityTRUE +a3*genderl+a4*afc_c+a5*sex_c+a6*das_c_inf; psi = 1 / (1 + exp(linpinfl)); eta_nb = (b0+u2 +b1*das_c+ b2*infidelityTRUE +b3*genderl+b4*afc_c+b5*sex_c+b6*das_c_inf); lambda = exp(eta_nb); p0 = psi + (1-psi)*exp(-(density+(1/k))*log(1+k*lambda)); p_else = (1-psi)* exp(lgamma(density+(1/k)) - lgamma(density+1) - lgamma(1/k) + density*log(k*lambda) - (density+(1/k))*log(1+k*lambda)); if density=0 then loglike = log(p0); else loglike = log(p_else); model density ~ general(loglike); * random u1 u2 ~normal([0,0],[v1,0,v2]) subject=ID; ***Use this line if we wanted to fit both random effects***; random u1 ~normal(0,v1) subject=ID; quit;title; footnote